*Cross tables and descriptive*

clear

use "Data\before_reduction.dta"


*Keep only necessary variables
keep pnr aar age koen ambition_type_k_4_s fined final_educ_level_4 final_field_num final_field wage_start_mean_ambition wage_growth_ambition final_educ life_time_earnings goldin_quotient ever_top_manager avr_participation avr_ft_if_par age_at_first_kid age_at_first_kid_women wealth_50 p_life_wage p_life_earnings



*merge on p_wealth
merge 1:1 pnr aar using "Data\wealth_ambition_final.dta", keepusing(p_wealth)

drop if _merge==2
drop _merge

*merge on couple_id individual_wage_growth_ambition old_ambition extreme_ambition
merge 1:1 pnr aar using "Data\kmeans ambition types.dta", keepusing(couple_id individual_wage_growth_ambition old_ambition extreme_ambition)

drop if _merge==2
drop _merge

*merge on educ_level_field educ_level_field_num 
merge 1:1 pnr aar using "Data\post_secondary_fields_full_population.dta", keepusing(educ_level_field educ_level_field_num )

drop if _merge==2
drop _merge

***Cross table with levels***

*Education
gen primary=0
replace primary=1 if fined==1 
gen secondary=0
replace secondary=1 if fined==2
gen bachelor=0
replace bachelor=1 if fined==3
gen master_phd=0
replace master_phd=1 if fined==4

**Table**
bysort pnr: egen last_year=max(age)

gen ambition_type_new=. if !missing(ambition_type_k_4_s)
replace ambition_type_new=1 if ambition_type_k_4_s==1
replace ambition_type_new=2 if ambition_type_k_4_s==4
replace ambition_type_new=3 if ambition_type_k_4_s==3
replace ambition_type_new=4 if ambition_type_k_4_s==2

gen temp=0 if age==last_year
replace temp=1 if !missing(temp) & ambition_type_new==1
egen temp1=mean(temp)
drop temp
gen temp=0 if age==last_year
replace temp=1 if !missing(temp) & ambition_type_new==2
egen temp2=mean(temp)
drop temp
gen temp=0 if age==last_year
replace temp=1 if !missing(temp) & ambition_type_new==3
egen temp3=mean(temp)
drop temp
gen temp=0 if age==last_year
replace temp=1 if !missing(temp) & ambition_type_new==4
egen temp4=mean(temp)
drop temp
gen pop_share=temp1 if ambition_type_new==1
replace pop_share=temp2 if ambition_type_new==2
replace pop_share=temp3 if ambition_type_new==3
replace pop_share=temp4 if ambition_type_new==4
drop temp1-temp4

estpost tabstat primary secondary bachelor master_phd pop_share if age==last_year, ///
by(ambition_type_new) c(stat) s(mean sd n)

label var primary "Primary"
label var secondary "Secondary"
label var bachelor "Bachelor"
label var master_phd "Master & PhD"
label var pop_share "Population share"

esttab using "Results\tab_A1\cross_levels_full_pop_fields.tex", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles

*Also document
esttab using "Results\tab_A1\cross_levels_full_pop_fields.rtf", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles


***Cross table with levels***
cap drop primary secondary

*fields
gen primary=0
replace primary=1 if educ_level_field_num==1 /*Could have used final_field_num instead so that level corresponds only to the final_educ code which ambition types is assign on the basis of. However, we have done it with fined previously (in levels version), so I proceed doing do. This makes sense giving how sorting is based on fined in a year not on final_educ level.*/
gen secondary=0
replace secondary=1 if educ_level_field_num==2
gen educ_hum=0
replace educ_hum=1 if educ_level_field_num==3
gen social_s=0
replace social_s=1 if educ_level_field_num==4
gen business=0
replace business=1 if educ_level_field_num==5
gen stem_f=0
replace stem_f=1 if educ_level_field_num==6
gen health_welfare=0
replace health_welfare=1 if educ_level_field_num==7
gen other=0
replace other=1 if educ_level_field_num==8


estpost tabstat primary secondary educ_hum social_s business stem_f health_welfare other pop_share if age==last_year, ///
by(ambition_type_new) c(stat) s(mean sd n)

label var primary "Primary"
label var secondary "Secondary"
label var educ_hum "Education and Humanities"
label var social_s "Social Science"
label var business "Business"
label var stem_f "STEM"
label var health_welfare "Health and Welfare"
label var other "Other"
label var pop_share "Population share"

esttab using "Results\tab_A1\cross_fields_full_pop_fields.tex", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles

*Also document
esttab using "Results\tab_A1\cross_fields_full_pop_fields.rtf", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles

***Descriptive table: Basics***

*Female share
gen female=0
replace female=1 if koen=="2"

*SD of wage growth at the program level
sort pnr aar
by pnr: egen first_year=min(aar)
gen individual=1 if aar==first_year
drop first_year

sort final_educ

gen temp=individual_wage_growth_ambition if final_educ!=. & final_educ!=1 & individual==1 & old_ambition==1 & extreme_ambition==0  

by final_educ: egen p_growth_sd=sd(temp)

drop temp

sort pnr aar

estpost tabstat pop_share female wage_start_mean_ambition wage_growth_ambition p_wealth p_growth_sd if age==last_year, ///
by(ambition_type_new) c(stat) s(mean sd n)

label var wage_start_mean_ambition "Starting wage"
label var wage_growth_ambition "Wage growth"
label var female "Female share"
label var pop_share "Population share"
label var p_wealth "Parental wealth at graduation"
label var p_growth_sd "Wage growth SD"

esttab using "Results\tab_A1\descriptive_basics.tex", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles

*Also document
esttab using "Results\tab_A1\descriptive_basics.rtf", replace main(mean) aux(sd) nostar unstack nonumber nonote noobs label collabels(none) eqlabels("(i)" "(ii)" "(iii)" "(iv)" "Population") nomtitles
